home *** CD-ROM | disk | FTP | other *** search
- TITLE SQRTI - INTEGER SQRT AND MAGNITUDE
-
- COMMENT $
-
- /* Routines for integer sqrt and magnitude for REND386 */
-
- // All code by Dave Stampe, last updated 23/12/93
- // These routines are (c) 1993 by Dave Stampe
-
- /*
- This code is part of the VR-386 project, created by Dave Stampe.
- VR-386 is a desendent of REND386, created by Dave Stampe and
- Bernie Roehl. Almost all the code has been rewritten by Dave
- Stampre for VR-386.
-
- Copyright (c) 1994 by Dave Stampe:
- May be freely used to write software for release into the public domain
- or for educational use; all commercial endeavours MUST contact Dave Stampe
- (dstampe@psych.toronto.edu) for permission to incorporate any part of
- this software or source code into their products! Usually there is no
- charge for under 50-100 items for low-cost or shareware products, and terms
- are reasonable. Any royalties are used for development, so equipment is
- often acceptable payment.
-
- ATTRIBUTION: If you use any part of this source code or the libraries
- in your projects, you must give attribution to VR-386 and Dave Stampe,
- and any other authors in your documentation, source code, and at startup
- of your program. Let's keep the freeware ball rolling!
-
- DEVELOPMENT: VR-386 is a effort to develop the process started by
- REND386, improving programmer access by rewriting the code and supplying
- a standard API. If you write improvements, add new functions rather
- than rewriting current functions. This will make it possible to
- include you improved code in the next API release. YOU can help advance
- VR-386. Comments on the API are welcome.
-
- CONTACT: dstampe@psych.toronto.edu
- */
-
- $
-
- .MODEL large
-
- .CODE INTMATH
-
- ; one iteration of 32->16 sqrt
- SQRT32 MACRO
- LOCAL skip
- shld edx,eax,2 ;; get 2 bits of input to error
- shl eax,2
- add ebx,ebx ;; estimate*2
- mov ecx,ebx ;; temp = est*2
- add ecx,ecx
- cmp edx,ecx ;; error>2*est?
- jle skip
- inc ebx ;; yes, update for new bit added
- inc ecx
- sub edx,ecx
- skip:
- ENDM
-
-
- ;long squareroot16(long arg)
-
- ; takes root of 32 bit number to 16 bit result
- ; about 220 clocks worst case:
- ; 3 us on 486/66 and 10 us on 386/25
-
-
- larg equ DWORD PTR [bp+8]
-
- PUBLIC _squareroot32
-
- _squareroot32 proc far
-
- .386
- push ebp
- mov ebp,esp
-
- push esi
- push ecx
-
- xor edx,edx
- xor ebx,ebx
-
- mov eax,DWORD PTR larg
- test eax,0FFFF0000h ; can we cut it in half?
- jne hasupper
-
- shl eax,16 ; yes, so prescale
- test eax,0FF000000h ; half again?
- jne do16
-
- shl eax,8 ; yes, prescale
- jmp do8 ; do 8 loops
-
- hasupper:
- test eax,0FF000000h ; half again?
- jne do32
- shl eax,8
- jmp do24
- do32:
- SQRT32
- SQRT32
- SQRT32
- SQRT32
- do24:
- SQRT32
- SQRT32
- SQRT32
- SQRT32
- do16:
- SQRT32
- SQRT32
- SQRT32
- SQRT32
- do8:
- SQRT32
- SQRT32
- SQRT32
- SQRT32
-
- mov eax,ebx
- shld edx,eax,16 ; returns in both eax and dx:ax
-
- pop ecx
- pop esi
-
- mov esp,ebp
- pop ebp
- ret
-
- _squareroot32 endp
-
-
- ; one iteration of 62->31 sqrt
- SQRT64 MACRO
- LOCAL skip
- shld edi,edx,2
- shld edx,eax,2 ; get 2 bits of input to error
- shl eax,2
- add ebx,ebx ; estimate*2
- mov ecx,ebx ; temp = est*2
- add ecx,ecx
- cmp edi,ecx ; error>2*est?
- jle skip
- inc ebx ; yes, update for new bit added
- inc ecx
- sub edi,ecx
- skip:
- ENDM
-
-
-
- ;long squareroot62(long hiarg, long loarg)
-
- ; takes root of 62 bit number to 31 bit result
- ; about 500 clocks worst case:
- ; 8 us on 486/66 and 20 us on 386/25
-
-
- hiarg equ DWORD PTR [bp+8]
- loarg equ DWORD PTR [bp+12]
-
- PUBLIC _squareroot62
-
- _squareroot62 proc far
-
- .386
- push ebp
- mov ebp,esp
-
- mov edx,DWORD PTR hiarg
- mov eax,DWORD PTR loarg
- or edx,edx
- jne dohigh
-
- push eax ; can use short root!
- call _squareroot32
- sub esp,4
- mov esp,ebp
- pop ebp
- ret
-
- dohigh:
- push ecx ; have to do 2 dwords
- push esi
- push edi
-
- xor edi,edi
- xor ebx,ebx
-
- shld edx,eax,2 ; prescale for 62 bits in 64 bit word
- shl eax,2
-
- test edx,0FFFF0000h ; can we cut it in half?
- jne hashigh
-
- shld edx,eax,16 ; yes, so prescale
- shl eax,16
- test edx,0FF000000h ; half again?
- jne do48 ; no, do 48 loops
-
- shld edx,eax,8
- shl eax,8 ; yes, prescale
- jmp do40 ; do 40 loops
-
- hashigh:
- test edx,0FF000000h ; half again?
- jne do64
- shld edx,eax,8
- shl eax,8
- jmp do56
-
- do64:
- SQRT64
- SQRT64
- SQRT64
- SQRT64
- do56:
- SQRT64
- SQRT64
- SQRT64
- SQRT64
- do48:
- SQRT64
- SQRT64
- SQRT64
- SQRT64
- do40:
- SQRT64
- SQRT64
- SQRT64
- SQRT64
-
- SQRT64
- SQRT64
- SQRT64
- SQRT64
-
- SQRT64
- SQRT64
- SQRT64
- SQRT64
-
- SQRT64
- SQRT64
- SQRT64
- SQRT64
-
- SQRT64
- SQRT64
- SQRT64
- ; one missing because of prescale
-
- mov eax,ebx
-
- pop edi
- pop esi
- pop ecx
-
- shld edx,eax,16 ; returns in both eax and dx:ax
-
- mov esp,ebp
- pop ebp
- ret
-
- _squareroot62 endp
-
-
- ;long magnitude32(long x, long y, long z)
-
- ; computes overall magnitude of vector
- ; no scaling or shortcuts: does 3x32-bit multiplies
- ; time: worst case of 650 clocks, best case of 200
- ; 3 to 10 us on 486/66, 8 to 25 us on 386/25
-
-
- x equ DWORD PTR [bp+8]
- y equ DWORD PTR [bp+12]
- z equ DWORD PTR [bp+16]
-
- PUBLIC _magnitude32
-
- _magnitude32 proc far
-
- .386
- push ebp
- mov ebp,esp
- push ecx
-
- mov eax,x ; sum of squares
- imul x
- mov ebx,eax
- mov ecx,edx
-
- mov eax,y
- imul y
- add ebx,eax
- adc ecx,edx
-
- mov eax,z
- imul z
- add ebx,eax
- adc ecx,edx
-
- push ebx ; square root
- push ecx
- call _squareroot62
- add esp,8
-
- pop ecx
- mov esp,ebp
- pop ebp
- ret
-
- _magnitude32 endp
-
-
- ;long magnitude16(int x, int y, int z)
-
- ; computes overall magnitude of vector
- ; no scaling or shortcuts: does 3x16-bit multiplies
- ; time: worst case of 300 clocks, best case of 150
- ; 2 to 5 us on 486/66, 6 to 12 us on 386/25
-
-
- x equ WORD PTR [bp+8]
- y equ WORD PTR [bp+10]
- z equ WORD PTR [bp+12]
-
- PUBLIC _magnitude16
-
- _magnitude16 proc far
-
- .386
- push ebp
- mov ebp,esp
- push ecx
-
- mov ax,x ; sum of squares
- imul x
- mov bx,ax
- mov cx,dx
-
- mov ax,y
- imul y
- add bx,ax
- adc cx,dx
-
- mov ax,z
- imul z
- add bx,ax
- adc cx,dx
-
- push cx ; square root
- push bx
- call _squareroot32
- add esp,4
-
- pop ecx
- mov esp,ebp
- pop ebp
- ret
-
- _magnitude16 endp
-
-
-
- ;void set_vector_length32(long length, long *xp, long *yp, long *zp)
-
- ; sets overall magnitude of vector
-
-
- length equ DWORD PTR [bp+8]
- xp equ DWORD PTR [bp+12]
- yp equ DWORD PTR [bp+16]
- zp equ DWORD PTR [bp+20]
-
-
- PUBLIC _set_vector_magnitude32
-
- _set_vector_magnitude32 proc far
-
- .386
- push ebp
- mov ebp,esp
- sub esp,20
-
- push ecx
- push edi
- push esi
-
- les bx,xp ; compute magnitude
- push DWORD PTR es:[bx]
- les bx,yp
- push DWORD PTR es:[bx]
- les bx,zp
- push DWORD PTR es:[bx]
-
- call _magnitude32
- add esp,12
- mov esi,eax
- or eax,eax
- je zero_magnitude
-
- les bx,xp ; scale each part
- mov eax, es:[bx]
- imul length
- idiv esi
- les bx,xp
- mov es:[bx],eax
-
- les bx,yp
- mov eax, es:[bx]
- imul length
- idiv esi
- les bx,yp
- mov es:[bx],eax
-
- les bx,zp
- mov eax, es:[bx]
- imul length
- idiv esi
- les bx,zp
- mov es:[bx],eax
-
- zero_magnitude:
- pop esi
- pop edi
- pop ecx
-
- mov esp,ebp
- pop ebp
- ret
-
- _set_vector_magnitude32 endp
-
-
-
- end